- Notifications
You must be signed in to change notification settings - Fork 849
/
Copy pathOML4Py Regression GLM.dsnb
executable file
·1 lines (1 loc) · 174 KB
/
OML4Py Regression GLM.dsnb
1
[{"layout":null,"template":null,"templateConfig":null,"name":"OML4Py Regression GLM","description":null,"readOnly":false,"type":"medium","paragraphs":[{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":null,"title":null,"message":[],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":0,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":null},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","## Regression Modeling to Predict Numerical Values using Generalized Linear Model","","This notebook shows how to predict numerical values using multiple regression. Given demographic, purchase, and affinity card data for a set of customers, predict the number of years a customer remains at the same residence, as found in column YRS_RESIDENCE - a continuous variable. We will use the Generalized Linear Model algorithm. All processing occurs inside Oracle Autonomous Database.","","Copyright (c) 2024 Oracle Corporation ","###### <a href=\"https://oss.oracle.com/licenses/upl/\" onclick=\"return ! window.open('https://oss.oracle.com/licenses/upl/');\">The Universal Permissive License (UPL), Version 1.0<\/a>","---"],"enabled":true,"result":{"startTime":1704380959872,"interpreter":"md.low","endTime":1704380959962,"results":[{"message":"<h2 id=\"regression-modeling-to-predict-numerical-values-using-generalized-linear-model\">Regression Modeling to Predict Numerical Values using Generalized Linear Model<\/h2>\n<p>This notebook shows how to predict numerical values using multiple regression. Given demographic, purchase, and affinity card data for a set of customers, predict the number of years a customer remains at the same residence, as found in column YRS_RESIDENCE - a continuous variable. We will use the Generalized Linear Model algorithm. All processing occurs inside Oracle Autonomous Database.<\/p>\n<p>Copyright (c) 2024 Oracle Corporation<\/p>\n<h6 id=\"the-universal-permissive-license-upl-version-10\"><a href=\"https://oss.oracle.com/licenses/upl/\" onclick=\"return ! window.open('https://oss.oracle.com/licenses/upl/');\">The Universal Permissive License (UPL), Version 1.0<\/a><\/h6>\n<hr />\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":9,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md ","<dl>","<img src=\"https://www.oracle.com/technetwork/database/options/advanced-analytics/regression-5663170.jpg\" alt=\"OML Notebooks\" width=\"250\"/>","<\/dl>"],"enabled":true,"result":{"startTime":1704380960098,"interpreter":"md.low","endTime":1704380960187,"results":[{"message":"<dl>\n<img src=\"https://www.oracle.com/technetwork/database/options/advanced-analytics/regression-5663170.jpg\" alt=\"OML Notebooks\" width=\"250\"/>\n<\/dl>\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":3,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"For more information...","message":["%md","","* <a href=\"https://docs.oracle.com/en/cloud/paas/autonomous-data-warehouse-cloud/index.html\" target=\"_blank\">Oracle ADB Documentation<\/a>","* <a href=\"https://github.com/oracle-samples/oracle-db-examples/tree/main/machine-learning\" target=\"_blank\">OML folder on Oracle GitHub<\/a>","* <a href=\"https://www.oracle.com/machine-learning\" target=\"_blank\">OML Web Page<\/a>","* <a href=\"https://www.oracle.com/goto/ml-regression\" target=\"_blank\">OML Regression<\/a>","* <a href=\"https://oracle.com/goto/ml-generalized-linear-model\" target=\"_blank\">OML Generalized Linear Model (GLM)<\/a>"],"enabled":true,"result":{"startTime":1704380960339,"interpreter":"md.low","endTime":1704380960439,"results":[{"message":"<ul>\n<li><a href=\"https://docs.oracle.com/en/cloud/paas/autonomous-data-warehouse-cloud/index.html\" target=\"_blank\">Oracle ADB Documentation<\/a><\/li>\n<li><a href=\"https://github.com/oracle-samples/oracle-db-examples/tree/main/machine-learning\" target=\"_blank\">OML folder on Oracle GitHub<\/a><\/li>\n<li><a href=\"https://www.oracle.com/machine-learning\" target=\"_blank\">OML Web Page<\/a><\/li>\n<li><a href=\"https://www.oracle.com/goto/ml-regression\" target=\"_blank\">OML Regression<\/a><\/li>\n<li><a href=\"https://oracle.com/goto/ml-generalized-linear-model\" target=\"_blank\">OML Generalized Linear Model (GLM)<\/a><\/li>\n<\/ul>\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Import libraries and set display options","message":["%python","","import pandas as pd","import oml","","pd.set_option('display.max_rows', 500)","pd.set_option('display.max_columns', 500)","pd.set_option('display.width', 1000)"],"enabled":true,"result":{"startTime":1704380960578,"interpreter":"python.low","endTime":1704380960687,"results":[],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":4,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Prepare the dataset combining customer data with supplemental demographics","message":["%python","","CUSTOMERS = oml.sync(query = 'SELECT CUST_ID, CUST_GENDER, CUST_MARITAL_STATUS, CUST_YEAR_OF_BIRTH, CUST_INCOME_LEVEL, CUST_CREDIT_LIMIT FROM SH.CUSTOMERS')","DEMO_DF = oml.sync(query = \"\"\"SELECT CUST_ID, EDUCATION, AFFINITY_CARD, HOUSEHOLD_SIZE, OCCUPATION, YRS_RESIDENCE, Y_BOX_GAMES"," FROM SH.SUPPLEMENTARY_DEMOGRAPHICS\"\"\")","CUST_DF = CUSTOMERS.merge(DEMO_DF, how = \"inner\", on = 'CUST_ID',suffixes = [\"\",\"\"])"],"enabled":true,"result":{"startTime":1704380960807,"interpreter":"python.low","endTime":1704380960913,"results":[],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":8,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Display a few rows from CUST_DF","message":["%python","","z.show(CUST_DF.head())"],"enabled":true,"result":{"startTime":1704380961040,"interpreter":"python.low","endTime":1704380961190,"results":[{"message":"CUST_ID\tCUST_GENDER\tCUST_MARITAL_STATUS\tCUST_YEAR_OF_BIRTH\tCUST_INCOME_LEVEL\tCUST_CREDIT_LIMIT\tEDUCATION\tAFFINITY_CARD\tHOUSEHOLD_SIZE\tOCCUPATION\tYRS_RESIDENCE\tY_BOX_GAMES\n100134\tF\tDivorc.\t1965\tL: 300,000 and above\t9000\tAssoc-A\t0\t2\tCleric.\t2\t0\n102828\tF\tNeverM\t1967\tE: 90,000 - 109,999\t10000\tHS-grad\t0\t1\tMachine\t4\t0\n101232\tM\tNeverM\t1979\tJ: 190,000 - 249,999\t9000\t< Bach.\t0\t1\tOther\t2\t1\n100696\tM\tMarried\t1971\tF: 110,000 - 129,999\t7000\tProfsc\t1\t3\tProf.\t3\t0\n103948\tM\tNeverM\t1966\tJ: 190,000 - 249,999\t9000\t< Bach.\t0\t1\tCleric.\t4\t0\n","type":"TABLE"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Split data into randomly selected 60% sample for train and 40% test","message":["%python","","TRAIN, TEST = CUST_DF.split(ratio = (0.6,0.4))","TRAIN_X = TRAIN.drop('YRS_RESIDENCE')","TRAIN_Y = TRAIN['YRS_RESIDENCE']","TEST_X = TEST.drop('YRS_RESIDENCE')","TEST_Y = TEST['YRS_RESIDENCE']"],"enabled":true,"result":{"startTime":1704380961322,"interpreter":"python.low","endTime":1704380961497,"results":[],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","### Build a GLM regression model for predicting YRS_RESIDENCE","","---"],"enabled":true,"result":{"startTime":1704380961611,"interpreter":"md.low","endTime":1704380961700,"results":[{"message":"<h3 id=\"build-a-glm-regression-model-for-predicting-yrs_residence\">Build a GLM regression model for predicting YRS_RESIDENCE<\/h3>\n<hr />\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":"[{\"raw\":{\"height\":300,\"lastColumns\":[],\"version\":1}}]","hideInIFrame":false,"selectedVisualization":"raw","title":" Using the default options","message":["%python","","settings = {}","","try:"," oml.drop(model = 'GLM_REGRESSION_MODEL')","except:"," print('No such model')"," ","glm_mod = oml.glm(\"regression\", **settings)","glm_mod.fit(TRAIN_X, TRAIN_Y, model_name = 'GLM_REGRESSION_MODEL', case_id = 'CUST_ID')"],"enabled":true,"result":{"startTime":1704380961817,"interpreter":"python.low","endTime":1704380963842,"results":[{"message":"\nModel Name: GLM_REGRESSION_MODEL\n\nModel Owner: OMLUSER\n\nAlgorithm Name: Generalized Linear Model\n\nMining Function: REGRESSION\n\nTarget: YRS_RESIDENCE\n\nSettings: \n setting name setting value\n0 ALGO_NAME ALGO_GENERALIZED_LINEAR_MODEL\n1 GLMS_CONF_LEVEL .95\n2 GLMS_FTR_GENERATION GLMS_FTR_GENERATION_DISABLE\n3 GLMS_FTR_SELECTION GLMS_FTR_SELECTION_DISABLE\n4 ODMS_DETAILS ODMS_ENABLE\n5 ODMS_MISSING_VALUE_TREATMENT ODMS_MISSING_VALUE_AUTO\n6 ODMS_SAMPLING ODMS_SAMPLING_DISABLE\n7 PREP_AUTO ON\n\nComputed Settings: \n setting name setting value\n0 GLMS_CONV_TOLERANCE .0000050000000000000004\n1 GLMS_NUM_ITERATIONS 30\n2 GLMS_RIDGE_REGRESSION GLMS_RIDGE_REG_DISABLE\n3 GLMS_SOLVER GLMS_SOLVER_CHOL\n\nGlobal Statistics: \n attribute name attribute value\n0 ADJUSTED_R_SQUARE 0.577297\n1 AIC 1133.61\n2 COEFF_VAR 30.4233\n3 CONVERGED YES\n4 CORRECTED_TOTAL_DF 2708\n5 CORRECTED_TOT_SS 9534.73\n6 DEPENDENT_MEAN 4.00997\n7 ERROR_DF 2652\n8 ERROR_MEAN_SQUARE 1.48832\n9 ERROR_SUM_SQUARES 3947.01\n10 F_VALUE 67.0427\n11 GMSEP 1.52032\n12 HOCKING_SP 0.000561417\n13 J_P 1.51963\n14 MODEL_DF 56\n15 MODEL_F_P_VALUE 0\n16 MODEL_MEAN_SQUARE 99.7807\n17 MODEL_SUM_SQUARES 5587.72\n18 NUM_PARAMS 57\n19 NUM_ROWS 2709\n20 RANK_DEFICIENCY 0\n21 ROOT_MEAN_SQ 1.21997\n22 R_SQ 0.586038\n23 SBIC 1470.16\n24 VALID_COVARIANCE_MATRIX YES\n\nAttributes: \nAFFINITY_CARD\nCUST_CREDIT_LIMIT\nCUST_GENDER\nCUST_INCOME_LEVEL\nCUST_MARITAL_STATUS\nCUST_YEAR_OF_BIRTH\nEDUCATION\nHOUSEHOLD_SIZE\nOCCUPATION\nY_BOX_GAMES\n\nPartition: NO\n\nCoefficients: \n\n attribute name attribute value coefficient std error t value p value significance code\n0 (Intercept) None 153.324901 5.433089 28.220575 0.000000e+00 ***\n1 AFFINITY_CARD None 0.340645 0.068606 4.965264 7.295958e-07 ***\n2 CUST_CREDIT_LIMIT None 0.000016 0.000008 2.034881 4.196216e-02 *\n3 CUST_GENDER F -0.113475 0.071621 -1.584376 1.132274e-01 \n4 CUST_INCOME_LEVEL A: Below 30,000 0.640696 0.198163 3.233176 1.239233e-03 **\n5 CUST_INCOME_LEVEL B: 30,000 - 49,999 0.144060 0.131455 1.095885 2.732286e-01 \n6 CUST_INCOME_LEVEL C: 50,000 - 69,999 0.217899 0.142770 1.526218 1.270749e-01 \n7 CUST_INCOME_LEVEL D: 70,000 - 89,999 -0.011105 0.153972 -0.072125 9.425082e-01 \n8 CUST_INCOME_LEVEL E: 90,000 - 109,999 0.205234 0.113004 1.816173 6.945669e-02 .\n9 CUST_INCOME_LEVEL F: 110,000 - 129,999 -0.081483 0.106074 -0.768174 4.424523e-01 \n10 CUST_INCOME_LEVEL G: 130,000 - 149,999 0.164336 0.103530 1.587333 1.125565e-01 \n11 CUST_INCOME_LEVEL H: 150,000 - 169,999 0.289081 0.102576 2.818204 4.865106e-03 **\n12 CUST_INCOME_LEVEL I: 170,000 - 189,999 -0.066614 0.094168 -0.707398 4.793812e-01 \n13 CUST_INCOME_LEVEL K: 250,000 - 299,999 0.012108 0.093756 0.129141 8.972561e-01 \n14 CUST_INCOME_LEVEL L: 300,000 and above -0.003457 0.080803 -0.042782 9.658787e-01 \n15 CUST_MARITAL_STATUS Divorc. 0.255132 0.324671 0.785816 4.320453e-01 \n16 CUST_MARITAL_STATUS Mabsent -0.231022 0.368937 -0.626183 5.312489e-01 \n17 CUST_MARITAL_STATUS Mar-AF -0.975312 0.720500 -1.353660 1.759602e-01 \n18 CUST_MARITAL_STATUS NeverM -0.012757 0.314184 -0.040604 9.676143e-01 \n19 CUST_MARITAL_STATUS Separ. -0.040635 0.344544 -0.117939 9.061248e-01 \n20 CUST_MARITAL_STATUS Widowed -0.945589 0.351749 -2.688250 7.227677e-03 **\n21 CUST_YEAR_OF_BIRTH None -0.075924 0.002775 -27.358344 0.000000e+00 ***\n22 EDUCATION 10th -0.055915 0.154137 -0.362762 7.168118e-01 \n23 EDUCATION 11th -0.281752 0.141721 -1.988072 4.690639e-02 *\n24 EDUCATION 12th -0.191792 0.204292 -0.938815 3.479110e-01 \n25 EDUCATION 1st-4th 0.187000 0.416084 0.449428 6.531600e-01 \n26 EDUCATION 5th-6th 0.312164 0.246440 1.266694 2.053761e-01 \n27 EDUCATION 7th-8th -0.528300 0.173677 -3.041848 2.374333e-03 **\n28 EDUCATION 9th -0.480972 0.176666 -2.722501 6.521376e-03 **\n29 EDUCATION < Bach. -0.205311 0.067205 -3.055003 2.272976e-03 **\n30 EDUCATION Assoc-A 0.213468 0.127929 1.668638 9.530715e-02 .\n31 EDUCATION Assoc-V 0.039609 0.124333 0.318572 7.500761e-01 \n32 EDUCATION Bach. 0.099102 0.079059 1.253519 2.101272e-01 \n33 EDUCATION Masters 0.335463 0.135236 2.480566 1.317900e-02 *\n34 EDUCATION PhD 0.784167 0.263927 2.971147 2.993555e-03 **\n35 EDUCATION Presch. -1.130164 0.712683 -1.585789 1.129065e-01 \n36 EDUCATION Profsc 0.610080 0.176570 3.455173 5.585775e-04 ***\n37 HOUSEHOLD_SIZE 1 -0.438462 0.321774 -1.362637 1.731127e-01 \n38 HOUSEHOLD_SIZE 2 -0.115546 0.321970 -0.358872 7.197194e-01 \n39 HOUSEHOLD_SIZE 4-5 -0.696323 0.134620 -5.172492 2.482309e-07 ***\n40 HOUSEHOLD_SIZE 6-8 -0.351825 0.326693 -1.076929 2.816100e-01 \n41 HOUSEHOLD_SIZE 9+ -0.289468 0.332312 -0.871073 3.837929e-01 \n42 OCCUPATION ? -0.432258 0.127862 -3.380671 7.335587e-04 ***\n43 OCCUPATION Armed-F -0.524535 1.229877 -0.426494 6.697826e-01 \n44 OCCUPATION Cleric. 0.061129 0.096605 0.632768 5.269398e-01 \n45 OCCUPATION Crafts -0.043418 0.096684 -0.449077 6.534131e-01 \n46 OCCUPATION Exec. 0.302740 0.096055 3.151724 1.641210e-03 **\n47 OCCUPATION Farming 0.217480 0.162922 1.334878 1.820308e-01 \n48 OCCUPATION Handler -0.327132 0.149622 -2.186389 2.887428e-02 *\n49 OCCUPATION House-s -1.237814 0.331445 -3.734594 1.919768e-04 ***\n50 OCCUPATION Machine -0.056752 0.118432 -0.479196 6.318385e-01 \n51 OCCUPATION Other -0.257805 0.101369 -2.543220 1.103994e-02 *\n52 OCCUPATION Prof. -0.140482 0.105139 -1.336155 1.816134e-01 \n53 OCCUPATION Protec. 0.028069 0.181098 0.154996 8.768363e-01 \n54 OCCUPATION TechSup -0.338445 0.132818 -2.548177 1.088468e-02 *\n55 OCCUPATION Transp. 0.296478 0.136628 2.169960 3.009840e-02 *\n56 Y_BOX_GAMES None -0.463632 0.083046 -5.582842 2.606260e-08 ***\n\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 '\n\nFit Details: \n\n name value\n0 ADJUSTED_R_SQUARE 0.577297\n1 AIC 1133.612459\n2 COEFF_VAR 30.423332\n3 CORRECTED_TOTAL_DF 2708.000000\n4 CORRECTED_TOT_SS 9534.730897\n5 DEPENDENT_MEAN 4.009967\n6 ERROR_DF 2652.000000\n7 ERROR_MEAN_SQUARE 1.488316\n8 ERROR_SUM_SQUARES 3947.013467\n9 F_VALUE 67.042673\n10 GMSEP 1.520316\n11 HOCKING_SP 0.000561\n12 J_P 1.519631\n13 MODEL_CONVERGED 1.000000\n14 MODEL_DF 56.000000\n15 MODEL_F_P_VALUE 0.000000\n16 MODEL_MEAN_SQUARE 99.780668\n17 MODEL_SUM_SQUARES 5587.717430\n18 NUM_PARAMS 57.000000\n19 NUM_ROWS 2709.000000\n20 RANK_DEFICIENCY 0.000000\n21 ROOT_MEAN_SQ 1.219965\n22 R_SQ 0.586038\n23 SBIC 1470.159545\n24 VALID_COVARIANCE_MATRIX 1.000000\n\nRank: \n\n57\n\nDeviance: \n\n3947.013467\n\nAIC: \n\n1134\n\nNull Deviance: \n\n9534.730897\n\nDF Residual: \n\n2652.0\n\nDF Null: \n\n2708.0\n\nConverged: \n\nTrue\n\n\n","type":"TEXT"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","### Examples of possible setting overrides for GLM ","","If the user does not override the default settings, then relevant settings are determined by the algorithm.","","A complete list of settings can be found in the Documentation link:","- Algorithm Settings: <a href=\"https://docs.oracle.com/pls/topic/lookup?ctx=en/database/oracle/oracle-database/23/arpls&id=ARPLS-GUID-4E3665B9-B1C2-4F6B-AB69-A7F353C70F5C\" onclick=\"return ! window.open('https://docs.oracle.com/pls/topic/lookup?ctx=en/database/oracle/oracle-database/23/arpls&id=ARPLS-GUID-4E3665B9-B1C2-4F6B-AB69-A7F353C70F5C');\">Generalized Linear Model<\/a>","","- Shared Settings: <a href=\"https://docs.oracle.com/en/database/oracle/oracle-database/23/arpls/DBMS_DATA_MINING.html#GUID-24047A09-0542-4870-91D8-329F28B0ED75\" onclick=\"return ! window.open('https://docs.oracle.com/en/database/oracle/oracle-database/23/arpls/DBMS_DATA_MINING.html#GUID-24047A09-0542-4870-91D8-329F28B0ED75');\">All algorithms<\/a>","","- Specify a row weight column ","> 'ODMS_ROW_WEIGHT_COLUMN_NAME' : '<row_weight_column_name>'","- Specify a missing value treatment method for the training data. This setting does not affect the scoring data. The default value is `ODMS_MISSING_VALUE_AUTO`. The option `ODMS_MISSING_VALUE_MEAN_MODE` replaces missing values with the mean (numeric attributes) or the mode (categorical attributes) both at build time and apply time where appropriate. The option `ODMS_MISSING_VALUE_AUTO` performs different strategies for different algorithms. When `ODMS_MISSING_VALUE_TREATMENT` is set to `ODMS_MISSING_VALUE_DELETE_ROW`, the rows in the training data that contain missing values are deleted. However, if you want to replicate this missing value treatment in the scoring data, then you must perform the transformation explicitly.","> 'ODMS_MISSING_VALUE_TREATMENT' : 'ODMS_MISSING_VALUE_AUTO'","- Specify The confidence level for coefficient confidence intervals. The value required is between 0 and 1 (excluding the edges), and the default confidence level is 0.95. ","> 'GLMS_CONF_LEVEL' : '0.95'","- Turn ridge regression on or off. Enable or disable ridge regression. Ridge applies to both regression and classification machine learning functions. When ridge is enabled, prediction bounds are not produced by the `PREDICTION_BOUNDS` SQL function. Ridge may only be enabled when feature selection is not specified, or has been explicitly disabled. If ridge regression and feature selection are both explicitly enabled, then an exception is raised. By default the system turns it on if there is a multicollinearity.","> 'GLMS_RIDGE_REGRESSION' : 'GLMS_RIDGE_REG_DISABLE'","- Specify The value of the ridge parameter. This setting is only used when the algorithm is configured to use ridge regression. If ridge regression is enabled internally by the algorithm, then the ridge parameter is determined by the algorithm. It requires a value greater than 0 (exclusive).","> 'GLMS_RIDGE_VALUE' : '2'","- Turn feature selection on or off. By default the system does not do feature selection. Also, feature generation requires feature selection to be enabled. Some options for feature selection are show below","> 'GLMS_FTR_SELECTION' : 'GLMS_FTR_SELECTION_ENABLE'","- Specify whether or not feature generation is enabled for GLM. By default, feature generation is not enabled. Feature generation can only be enabled when feature selection is also enabled. ","> 'GLMS_FTR_GENERATION' : 'GLMS_FTR_GENERATION_ENABLE'","- Specify whether feature generation is quadratic or cubic. When feature generation is enabled, the algorithm automatically chooses the most appropriate feature generation method based on the data. Options are `GLMS_FTR_GEN_QUADRATIC` or `GLMS_FTR_GEN_CUBIC`","> 'GLMS_FTR_GEN_METHOD' : 'GLMS_FTR_GEN_QUADRATIC'","- Specify feature selection penalty criterion for adding a feature to the model. When feature selection is enabled, the algorithm automatically chooses the penalty criterion based on the data. Options are `GLMS_FTR_SEL_AIC`, `GLMS_FTR_SEL_SBIC`,`GLMS_FTR_SEL_RIC`,`GLMS_FTR_SEL_ALPHA_INV`. ","> 'GLMS_FTR_SEL_CRIT' : 'GLMS_FTR_SEL_ALPHA_INV'","- Specify the maximum number of features that can be selected for the final model (when feature selection is enabled). By default, the algorithm limits the number of features to ensure sufficient memory. It requires a number between 0 (exclusive) and 2000 (inclusive).","> 'GLMS_MAX_FEATURES' : '30'","- Specify whether prune is enabled or disabled for features in the final model. Pruning is based on T-Test statistics for linear regression, or Wald Test statistics for logistic regression. Features are pruned in a loop until all features are statistically significant with respect to the full data. When feature selection is enabled, the algorithm automatically performs pruning based on the data.","> 'GLMS_PRUNE_MODEL' : 'GLMS_PRUNE_MODEL_ENABLE'","- Specify the target value used as the `reference class` in a binary logistic regression model. Probabilities are produced for the other class. By default, the algorithm chooses the value with the highest prevalence (the most cases) for the reference class.","> 'GLMS_REFERENCE_CLASS_NAME' : '<target_value>'","- Specify whether to enable or disable row diagnostics. The default is `GLMS_ROW_DIAG_DISABLE`","> 'GLMS_ROW_DIAGNOSTICS' : 'GLMS_ROW_DIAG_ENABLE'","- Specify the convergence tolerance setting of the GLM algorithm. The default value is system-determined, and the value required is between 0 and 1 (exclusive).","> 'GLMS_CONV_TOLERANCE' : '0.0001'","- Specify the maximum number of iterations for the GLM algorithm. The default value is system-determined, and it requires a positive integer.","> 'GLMS_NUM_ITERATIONS' : '200'","- Specify the number of rows in a batch used by the SGD solver. The value of this parameter sets the size of the batch for the SGD solver. An input of 0 triggers a data driven batch size estimate, and it requires a positive integer. The default is 2,000.","> 'GLMS_BATCH_ROWS' : '2000'","- Specify the GLM solver to use. The solver cannot be selected if `GLMS_FTR_SELECTION` setting is enabled. The default value is system determined, and the options are `GLMS_SOLVER_SGD` (Stochastic Gradient Descent), `GLMS_SOLVER_CHOL` (Cholesky), `GLMS_SOLVER_QR` (QR decomposition) and `GLMS_SOLVER_LBFGS_ADMM` (LBFGS and ADAM)","> 'GLMS_SOLVER' : 'GLMS_SOLVER_SGD'","- Specify whether to use sparse solver if it is available. The default value is GLMS_SPARSE_SOLVER_DISABLE. ","> 'GLMS_SPARSE_SOLVER' : 'GLMS_SPARSE_SOLVER_ENABLE'","","","<strong> For Oracle release 23ai and newer. <\/strong>","- Specify which GLM Link Function to use. This setting allows the user to specify the link function for building a GLM model. The link functions are specific to the mining function. "," For classification, the options are `GLMS_LOGIT_LINK (default)`, `GLMS_PROBIT_LINK`, `GLMS_CLOGLOG_LINK` or `GLMS_CAUCHIT_LINK`. For regression, the default is `GLMS_IDENTITY_LINK (default)`.","> 'GLMS_LINK_FUNCTION' : 'GLMS_LOGIT_LINK'"],"enabled":true,"result":{"startTime":1715317718368,"interpreter":"md.medium","endTime":1715317718560,"results":[{"message":"<h3 id=\"examples-of-possible-setting-overrides-for-glm\">Examples of possible setting overrides for GLM<\/h3>\n<p>If the user does not override the default settings, then relevant settings are determined by the algorithm.<\/p>\n<p>A complete list of settings can be found in the Documentation link:<\/p>\n<ul>\n<li>\n<p>Algorithm Settings: <a href=\"https://docs.oracle.com/pls/topic/lookup?ctx=en/database/oracle/oracle-database/23/arpls&id=ARPLS-GUID-4E3665B9-B1C2-4F6B-AB69-A7F353C70F5C\" onclick=\"return ! window.open('https://docs.oracle.com/pls/topic/lookup?ctx=en/database/oracle/oracle-database/23/arpls&id=ARPLS-GUID-4E3665B9-B1C2-4F6B-AB69-A7F353C70F5C');\">Generalized Linear Model<\/a><\/p>\n<\/li>\n<li>\n<p>Shared Settings: <a href=\"https://docs.oracle.com/en/database/oracle/oracle-database/23/arpls/DBMS_DATA_MINING.html#GUID-24047A09-0542-4870-91D8-329F28B0ED75\" onclick=\"return ! window.open('https://docs.oracle.com/en/database/oracle/oracle-database/23/arpls/DBMS_DATA_MINING.html#GUID-24047A09-0542-4870-91D8-329F28B0ED75');\">All algorithms<\/a><\/p>\n<\/li>\n<li>\n<p>Specify a row weight column<\/p>\n<\/li>\n<\/ul>\n<blockquote>\n<p>'ODMS_ROW_WEIGHT_COLUMN_NAME' : '<row_weight_column_name>'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify a missing value treatment method for the training data. This setting does not affect the scoring data. The default value is <code>ODMS_MISSING_VALUE_AUTO<\/code>. The option <code>ODMS_MISSING_VALUE_MEAN_MODE<\/code> replaces missing values with the mean (numeric attributes) or the mode (categorical attributes) both at build time and apply time where appropriate. The option <code>ODMS_MISSING_VALUE_AUTO<\/code> performs different strategies for different algorithms. When <code>ODMS_MISSING_VALUE_TREATMENT<\/code> is set to <code>ODMS_MISSING_VALUE_DELETE_ROW<\/code>, the rows in the training data that contain missing values are deleted. However, if you want to replicate this missing value treatment in the scoring data, then you must perform the transformation explicitly.<\/li>\n<\/ul>\n<blockquote>\n<p>'ODMS_MISSING_VALUE_TREATMENT' : 'ODMS_MISSING_VALUE_AUTO'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify The confidence level for coefficient confidence intervals. The value required is between 0 and 1 (excluding the edges), and the default confidence level is 0.95.<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_CONF_LEVEL' : '0.95'<\/p>\n<\/blockquote>\n<ul>\n<li>Turn ridge regression on or off. Enable or disable ridge regression. Ridge applies to both regression and classification machine learning functions. When ridge is enabled, prediction bounds are not produced by the <code>PREDICTION_BOUNDS<\/code> SQL function. Ridge may only be enabled when feature selection is not specified, or has been explicitly disabled. If ridge regression and feature selection are both explicitly enabled, then an exception is raised. By default the system turns it on if there is a multicollinearity.<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_RIDGE_REGRESSION' : 'GLMS_RIDGE_REG_DISABLE'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify The value of the ridge parameter. This setting is only used when the algorithm is configured to use ridge regression. If ridge regression is enabled internally by the algorithm, then the ridge parameter is determined by the algorithm. It requires a value greater than 0 (exclusive).<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_RIDGE_VALUE' : '2'<\/p>\n<\/blockquote>\n<ul>\n<li>Turn feature selection on or off. By default the system does not do feature selection. Also, feature generation requires feature selection to be enabled. Some options for feature selection are show below<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_FTR_SELECTION' : 'GLMS_FTR_SELECTION_ENABLE'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify whether or not feature generation is enabled for GLM. By default, feature generation is not enabled. Feature generation can only be enabled when feature selection is also enabled.<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_FTR_GENERATION' : 'GLMS_FTR_GENERATION_ENABLE'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify whether feature generation is quadratic or cubic. When feature generation is enabled, the algorithm automatically chooses the most appropriate feature generation method based on the data. Options are <code>GLMS_FTR_GEN_QUADRATIC<\/code> or <code>GLMS_FTR_GEN_CUBIC<\/code><\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_FTR_GEN_METHOD' : 'GLMS_FTR_GEN_QUADRATIC'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify feature selection penalty criterion for adding a feature to the model. When feature selection is enabled, the algorithm automatically chooses the penalty criterion based on the data. Options are <code>GLMS_FTR_SEL_AIC<\/code>, <code>GLMS_FTR_SEL_SBIC<\/code>,<code>GLMS_FTR_SEL_RIC<\/code>,<code>GLMS_FTR_SEL_ALPHA_INV<\/code>.<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_FTR_SEL_CRIT' : 'GLMS_FTR_SEL_ALPHA_INV'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify the maximum number of features that can be selected for the final model (when feature selection is enabled). By default, the algorithm limits the number of features to ensure sufficient memory. It requires a number between 0 (exclusive) and 2000 (inclusive).<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_MAX_FEATURES' : '30'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify whether prune is enabled or disabled for features in the final model. Pruning is based on T-Test statistics for linear regression, or Wald Test statistics for logistic regression. Features are pruned in a loop until all features are statistically significant with respect to the full data. When feature selection is enabled, the algorithm automatically performs pruning based on the data.<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_PRUNE_MODEL' : 'GLMS_PRUNE_MODEL_ENABLE'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify the target value used as the <code>reference class<\/code> in a binary logistic regression model. Probabilities are produced for the other class. By default, the algorithm chooses the value with the highest prevalence (the most cases) for the reference class.<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_REFERENCE_CLASS_NAME' : '<target_value>'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify whether to enable or disable row diagnostics. The default is <code>GLMS_ROW_DIAG_DISABLE<\/code><\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_ROW_DIAGNOSTICS' : 'GLMS_ROW_DIAG_ENABLE'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify the convergence tolerance setting of the GLM algorithm. The default value is system-determined, and the value required is between 0 and 1 (exclusive).<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_CONV_TOLERANCE' : '0.0001'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify the maximum number of iterations for the GLM algorithm. The default value is system-determined, and it requires a positive integer.<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_NUM_ITERATIONS' : '200'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify the number of rows in a batch used by the SGD solver. The value of this parameter sets the size of the batch for the SGD solver. An input of 0 triggers a data driven batch size estimate, and it requires a positive integer. The default is 2,000.<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_BATCH_ROWS' : '2000'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify the GLM solver to use. The solver cannot be selected if <code>GLMS_FTR_SELECTION<\/code> setting is enabled. The default value is system determined, and the options are <code>GLMS_SOLVER_SGD<\/code> (Stochastic Gradient Descent), <code>GLMS_SOLVER_CHOL<\/code> (Cholesky), <code>GLMS_SOLVER_QR<\/code> (QR decomposition) and <code>GLMS_SOLVER_LBFGS_ADMM<\/code> (LBFGS and ADAM)<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_SOLVER' : 'GLMS_SOLVER_SGD'<\/p>\n<\/blockquote>\n<ul>\n<li>Specify whether to use sparse solver if it is available. The default value is GLMS_SPARSE_SOLVER_DISABLE.<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_SPARSE_SOLVER' : 'GLMS_SPARSE_SOLVER_ENABLE'<\/p>\n<\/blockquote>\n<p><strong> For Oracle release 23ai and newer. <\/strong><\/p>\n<ul>\n<li>Specify which GLM Link Function to use. This setting allows the user to specify the link function for building a GLM model. The link functions are specific to the mining function.\nFor classification, the options are <code>GLMS_LOGIT_LINK (default)<\/code>, <code>GLMS_PROBIT_LINK<\/code>, <code>GLMS_CLOGLOG_LINK<\/code> or <code>GLMS_CAUCHIT_LINK<\/code>. For regression, the default is <code>GLMS_IDENTITY_LINK (default)<\/code>.<\/li>\n<\/ul>\n<blockquote>\n<p>'GLMS_LINK_FUNCTION' : 'GLMS_LOGIT_LINK'<\/p>\n<\/blockquote>\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":"[{\"raw\":{\"height\":300,\"lastColumns\":[],\"version\":1}}]","hideInIFrame":false,"selectedVisualization":"raw","title":"Using options for Feature Selection and Generation","message":["%python","","settings = {'GLMS_RIDGE_REGRESSION':'GLMS_RIDGE_REG_DISABLE',"," 'GLMS_FTR_SELECTION':'GLMS_FTR_SELECTION_ENABLE',"," 'GLMS_FTR_GEN_METHOD':'GLMS_FTR_GEN_QUADRATIC',"," 'GLMS_FTR_GENERATION': 'GLMS_FTR_GENERATION_ENABLE', "," 'GLMS_FTR_SEL_CRIT':'GLMS_FTR_SEL_AIC',"," 'GLMS_NUM_ITERATIONS':'50',"," 'GLMS_MAX_FEATURES':'30',"," 'GLMS_PRUNE_MODEL':'GLMS_PRUNE_MODEL_ENABLE',"," 'GLMS_ROW_DIAGNOSTICS':'GLMS_ROW_DIAG_DISABLE',"," 'GLMS_CONV_TOLERANCE':'0.0001',"," 'GLMS_BATCH_ROWS':'2000'}","","try:"," oml.drop(model = 'GLM_REGRESSION_MODEL')","except:"," print('No such model')"," ","glm_mod = oml.glm(\"regression\", **settings)","glm_mod.fit(TRAIN_X, TRAIN_Y, model_name = 'GLM_REGRESSION_MODEL', case_id = 'CUST_ID')"],"enabled":true,"result":{"startTime":1704380964208,"interpreter":"python.low","endTime":1704380966242,"results":[{"message":"\nModel Name: GLM_REGRESSION_MODEL\n\nModel Owner: OMLUSER\n\nAlgorithm Name: Generalized Linear Model\n\nMining Function: REGRESSION\n\nTarget: YRS_RESIDENCE\n\nSettings: \n setting name setting value\n0 ALGO_NAME ALGO_GENERALIZED_LINEAR_MODEL\n1 GLMS_BATCH_ROWS 2000\n2 GLMS_CONF_LEVEL .95\n3 GLMS_CONV_TOLERANCE 0.0001\n4 GLMS_FTR_GENERATION GLMS_FTR_GENERATION_ENABLE\n5 GLMS_FTR_GEN_METHOD GLMS_FTR_GEN_QUADRATIC\n6 GLMS_FTR_SELECTION GLMS_FTR_SELECTION_ENABLE\n7 GLMS_FTR_SEL_CRIT GLMS_FTR_SEL_AIC\n8 GLMS_MAX_FEATURES 30\n9 GLMS_NUM_ITERATIONS 50\n10 GLMS_PRUNE_MODEL GLMS_PRUNE_MODEL_ENABLE\n11 GLMS_RIDGE_REGRESSION GLMS_RIDGE_REG_DISABLE\n12 GLMS_ROW_DIAGNOSTICS GLMS_ROW_DIAG_DISABLE\n13 GLMS_SELECT_BLOCK GLMS_SELECT_BLOCK_DISABLE\n14 ODMS_DETAILS ODMS_ENABLE\n15 ODMS_MISSING_VALUE_TREATMENT ODMS_MISSING_VALUE_AUTO\n16 ODMS_SAMPLING ODMS_SAMPLING_DISABLE\n17 PREP_AUTO ON\n\nGlobal Statistics: \n attribute name attribute value\n0 ADJUSTED_R_SQUARE 0.570989\n1 AIC 1141.24\n2 COEFF_VAR 30.6495\n3 CONVERGED YES\n4 CORRECTED_TOTAL_DF 2708\n5 CORRECTED_TOT_SS 9534.73\n6 DEPENDENT_MEAN 4.00997\n7 ERROR_DF 2685\n8 ERROR_MEAN_SQUARE 1.51053\n9 ERROR_SUM_SQUARES 4055.76\n10 F_VALUE 157.704\n11 GMSEP 1.52403\n12 HOCKING_SP 0.000562789\n13 J_P 1.52391\n14 MODEL_DF 23\n15 MODEL_F_P_VALUE 0\n16 MODEL_MEAN_SQUARE 238.216\n17 MODEL_SUM_SQUARES 5478.97\n18 NUM_PARAMS 24\n19 NUM_ROWS 2709\n20 ROOT_MEAN_SQ 1.22903\n21 R_SQ 0.574633\n22 SBIC 1282.95\n23 VALID_COVARIANCE_MATRIX YES\n\nAttributes: \nAFFINITY_CARD\nCUST_GENDER\nCUST_INCOME_LEVEL\nCUST_MARITAL_STATUS\nCUST_YEAR_OF_BIRTH\nEDUCATION\nHOUSEHOLD_SIZE\nOCCUPATION\nY_BOX_GAMES\n\nPartition: NO\n\nCoefficients: \n\n attribute name attribute value coefficient std error t value p value significance code\n0 (Intercept) None 151.619049 5.325376 28.471050 0.000000e+00 ***\n1 AFFINITY_CARD None 0.405005 0.065140 6.217413 5.842160e-10 ***\n2 CUST_GENDER F -0.156305 0.064863 -2.409772 1.602937e-02 *\n3 CUST_INCOME_LEVEL A: Below 30,000 0.480111 0.178824 2.684823 7.301432e-03 **\n4 CUST_INCOME_LEVEL F: 110,000 - 129,999 -0.185843 0.088004 -2.111773 3.479808e-02 *\n5 CUST_INCOME_LEVEL H: 150,000 - 169,999 0.210351 0.086174 2.440988 1.471132e-02 *\n6 CUST_MARITAL_STATUS Divorc. 0.224238 0.079465 2.821836 4.810077e-03 **\n7 CUST_MARITAL_STATUS Widowed -1.072453 0.153037 -7.007801 3.049532e-12 ***\n8 CUST_YEAR_OF_BIRTH None -0.075006 0.002719 -27.585654 0.000000e+00 ***\n9 EDUCATION < Bach. -0.167355 0.058745 -2.848861 4.421013e-03 **\n10 EDUCATION Masters 0.240118 0.121931 1.969301 4.902121e-02 *\n11 EDUCATION PhD 0.724599 0.254657 2.845393 4.469288e-03 **\n12 EDUCATION Profsc 0.507417 0.160317 3.165075 1.567788e-03 **\n13 HOUSEHOLD_SIZE 1 -0.380655 0.080496 -4.728864 2.374468e-06 ***\n14 HOUSEHOLD_SIZE 4-5 -0.671540 0.126990 -5.288148 1.335068e-07 ***\n15 HOUSEHOLD_SIZE 6-8 -0.292035 0.136270 -2.143066 3.219746e-02 *\n16 HOUSEHOLD_SIZE 9+ -0.210189 0.088008 -2.388277 1.699627e-02 *\n17 OCCUPATION ? -0.433972 0.114666 -3.784672 1.572781e-04 ***\n18 OCCUPATION Exec. 0.372300 0.075989 4.899416 1.018327e-06 ***\n19 OCCUPATION Handler -0.331908 0.136608 -2.429631 1.517937e-02 *\n20 OCCUPATION Other -0.274482 0.081512 -3.367383 7.695453e-04 ***\n21 OCCUPATION TechSup -0.268835 0.119675 -2.246382 2.476053e-02 *\n22 OCCUPATION Transp. 0.302057 0.121823 2.479479 1.321838e-02 *\n23 Y_BOX_GAMES None -0.483562 0.081466 -5.935759 3.301191e-09 ***\n\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 '\n\nFit Details: \n\n name value\n0 ADJUSTED_R_SQUARE 0.570989\n1 AIC 1141.242506\n2 COEFF_VAR 30.649500\n3 CORRECTED_TOTAL_DF 2708.000000\n4 CORRECTED_TOT_SS 9534.730897\n5 DEPENDENT_MEAN 4.009967\n6 ERROR_DF 2685.000000\n7 ERROR_MEAN_SQUARE 1.510527\n8 ERROR_SUM_SQUARES 4055.763666\n9 F_VALUE 157.703930\n10 GMSEP 1.524033\n11 HOCKING_SP 0.000563\n12 J_P 1.523909\n13 MODEL_CONVERGED 1.000000\n14 MODEL_DF 23.000000\n15 MODEL_F_P_VALUE 0.000000\n16 MODEL_MEAN_SQUARE 238.215967\n17 MODEL_SUM_SQUARES 5478.967231\n18 NUM_PARAMS 24.000000\n19 NUM_ROWS 2709.000000\n20 ROOT_MEAN_SQ 1.229035\n21 R_SQ 0.574633\n22 SBIC 1282.946542\n23 VALID_COVARIANCE_MATRIX 1.000000\n\nRank: \n\n24\n\nDeviance: \n\n4055.763666\n\nAIC: \n\n1141\n\nNull Deviance: \n\n9534.730897\n\nDF Residual: \n\n2685.0\n\nDF Null: \n\n2708.0\n\nConverged: \n\nTrue\n\n\n","type":"TEXT"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":"[{\"raw\":{\"height\":300,\"lastColumns\":[],\"version\":1}}]","hideInIFrame":false,"selectedVisualization":"raw","title":"Using options for Ridge Regression","message":["%python","","settings = {'GLMS_RIDGE_REGRESSION':'GLMS_RIDGE_REG_ENABLE',"," 'GLMS_FTR_SELECTION':'GLMS_FTR_SELECTION_DISABLE',"," 'GLMS_ROW_DIAGNOSTICS':'GLMS_ROW_DIAG_DISABLE',"," 'GLMS_CONV_TOLERANCE':'0.0001',"," 'GLMS_NUM_ITERATIONS':'50',"," 'GLMS_RIDGE_VALUE':'0.1',"," 'GLMS_BATCH_ROWS':'2000'}","","try:"," oml.drop(model = 'GLM_REGRESSION_MODEL')","except:"," print('No such model')"," ","glm_mod = oml.glm(\"regression\", **settings)","glm_mod.fit(TRAIN_X, TRAIN_Y, model_name = 'GLM_REGRESSION_MODEL', case_id = 'CUST_ID')"],"enabled":true,"result":{"startTime":1704380966365,"interpreter":"python.low","endTime":1704380968291,"results":[{"message":"\nModel Name: GLM_REGRESSION_MODEL\n\nModel Owner: OMLUSER\n\nAlgorithm Name: Generalized Linear Model\n\nMining Function: REGRESSION\n\nTarget: YRS_RESIDENCE\n\nSettings: \n setting name setting value\n0 ALGO_NAME ALGO_GENERALIZED_LINEAR_MODEL\n1 GLMS_BATCH_ROWS 2000\n2 GLMS_CONF_LEVEL .95\n3 GLMS_CONV_TOLERANCE 0.0001\n4 GLMS_FTR_GENERATION GLMS_FTR_GENERATION_DISABLE\n5 GLMS_FTR_SELECTION GLMS_FTR_SELECTION_DISABLE\n6 GLMS_NUM_ITERATIONS 50\n7 GLMS_RIDGE_REGRESSION GLMS_RIDGE_REG_ENABLE\n8 GLMS_RIDGE_VALUE 0.1\n9 GLMS_ROW_DIAGNOSTICS GLMS_ROW_DIAG_DISABLE\n10 GLMS_VIF_FOR_RIDGE GLMS_VIF_RIDGE_DISABLE\n11 ODMS_DETAILS ODMS_ENABLE\n12 ODMS_MISSING_VALUE_TREATMENT ODMS_MISSING_VALUE_AUTO\n13 ODMS_SAMPLING ODMS_SAMPLING_DISABLE\n14 PREP_AUTO ON\n\nComputed Settings: \n setting name setting value\n0 GLMS_SOLVER GLMS_SOLVER_CHOL\n\nGlobal Statistics: \n attribute name attribute value\n0 ADJUSTED_R_SQUARE 0.577186\n1 AIC 1134.32\n2 COEFF_VAR 30.4273\n3 CONVERGED YES\n4 CORRECTED_TOTAL_DF 2708\n5 CORRECTED_TOT_SS 9534.73\n6 DEPENDENT_MEAN 4.00997\n7 ERROR_DF 2652\n8 ERROR_MEAN_SQUARE 1.48871\n9 ERROR_SUM_SQUARES 3948.05\n10 F_VALUE 67.0127\n11 GMSEP 1.52071\n12 HOCKING_SP 0.000561564\n13 J_P 1.52003\n14 MODEL_DF 56\n15 MODEL_F_P_VALUE 0\n16 MODEL_MEAN_SQUARE 99.7622\n17 MODEL_SUM_SQUARES 5586.68\n18 NUM_PARAMS 57\n19 NUM_ROWS 2709\n20 RANK_DEFICIENCY 0\n21 ROOT_MEAN_SQ 1.22013\n22 R_SQ 0.58593\n23 SBIC 1470.87\n24 VALID_COVARIANCE_MATRIX YES\n\nAttributes: \nAFFINITY_CARD\nCUST_CREDIT_LIMIT\nCUST_GENDER\nCUST_INCOME_LEVEL\nCUST_MARITAL_STATUS\nCUST_YEAR_OF_BIRTH\nEDUCATION\nHOUSEHOLD_SIZE\nOCCUPATION\nY_BOX_GAMES\n\nPartition: NO\n\nCoefficients: \n\n attribute name attribute value coefficient std error t value p value significance code\n0 (Intercept) None 153.286770 5.432447 28.216893 0.000000e+00 ***\n1 AFFINITY_CARD None 0.341061 0.068599 4.971812 7.055932e-07 ***\n2 CUST_CREDIT_LIMIT None 0.000016 0.000008 2.031948 4.225836e-02 *\n3 CUST_GENDER F -0.113944 0.071593 -1.591559 1.116029e-01 \n4 CUST_INCOME_LEVEL A: Below 30,000 0.638712 0.197875 3.227863 1.262389e-03 **\n5 CUST_INCOME_LEVEL B: 30,000 - 49,999 0.143608 0.131329 1.093496 2.742753e-01 \n6 CUST_INCOME_LEVEL C: 50,000 - 69,999 0.217494 0.142641 1.524762 1.274377e-01 \n7 CUST_INCOME_LEVEL D: 70,000 - 89,999 -0.011218 0.153834 -0.072924 9.418718e-01 \n8 CUST_INCOME_LEVEL E: 90,000 - 109,999 0.205015 0.112914 1.815669 6.953406e-02 .\n9 CUST_INCOME_LEVEL F: 110,000 - 129,999 -0.081756 0.105989 -0.771362 4.405610e-01 \n10 CUST_INCOME_LEVEL G: 130,000 - 149,999 0.164024 0.103451 1.585523 1.129668e-01 \n11 CUST_INCOME_LEVEL H: 150,000 - 169,999 0.288714 0.102497 2.816809 4.886220e-03 **\n12 CUST_INCOME_LEVEL I: 170,000 - 189,999 -0.066784 0.094095 -0.709746 4.779238e-01 \n13 CUST_INCOME_LEVEL K: 250,000 - 299,999 0.012075 0.093706 0.128866 8.974734e-01 \n14 CUST_INCOME_LEVEL L: 300,000 and above -0.003902 0.080750 -0.048323 9.614627e-01 \n15 CUST_MARITAL_STATUS Divorc. 0.253653 0.315765 0.803296 4.218758e-01 \n16 CUST_MARITAL_STATUS Mabsent -0.232063 0.360465 -0.643788 5.197688e-01 \n17 CUST_MARITAL_STATUS Mar-AF -0.942407 0.708334 -1.330455 1.834827e-01 \n18 CUST_MARITAL_STATUS NeverM -0.014300 0.305534 -0.046804 9.626733e-01 \n19 CUST_MARITAL_STATUS Separ. -0.041920 0.335911 -0.124795 9.006955e-01 \n20 CUST_MARITAL_STATUS Widowed -0.946343 0.343085 -2.758334 5.849528e-03 **\n21 CUST_YEAR_OF_BIRTH None -0.075905 0.002775 -27.354516 0.000000e+00 ***\n22 EDUCATION 10th -0.055951 0.154018 -0.363277 7.164271e-01 \n23 EDUCATION 11th -0.281551 0.141632 -1.987909 4.692445e-02 *\n24 EDUCATION 12th -0.191125 0.204022 -0.936783 3.489552e-01 \n25 EDUCATION 1st-4th 0.185111 0.413668 0.447488 6.545592e-01 \n26 EDUCATION 5th-6th 0.311115 0.245960 1.264899 2.060185e-01 \n27 EDUCATION 7th-8th -0.527250 0.173509 -3.038746 2.398826e-03 **\n28 EDUCATION 9th -0.480464 0.176480 -2.722481 6.521764e-03 **\n29 EDUCATION < Bach. -0.205203 0.067175 -3.054761 2.274804e-03 **\n30 EDUCATION Assoc-A 0.212537 0.127838 1.662556 9.651949e-02 .\n31 EDUCATION Assoc-V 0.039585 0.124268 0.318543 7.500980e-01 \n32 EDUCATION Bach. 0.098861 0.079016 1.251158 2.109872e-01 \n33 EDUCATION Masters 0.334172 0.135078 2.473920 1.342612e-02 *\n34 EDUCATION PhD 0.780157 0.263323 2.962736 3.076294e-03 **\n35 EDUCATION Presch. -1.093416 0.700902 -1.560013 1.188762e-01 \n36 EDUCATION Profsc 0.608335 0.176374 3.449124 5.711853e-04 ***\n37 HOUSEHOLD_SIZE 1 -0.436662 0.313081 -1.394725 1.632155e-01 \n38 HOUSEHOLD_SIZE 2 -0.113825 0.312996 -0.363662 7.161393e-01 \n39 HOUSEHOLD_SIZE 4-5 -0.696452 0.134493 -5.178353 2.406347e-07 ***\n40 HOUSEHOLD_SIZE 6-8 -0.349792 0.318628 -1.097805 2.723895e-01 \n41 HOUSEHOLD_SIZE 9+ -0.287699 0.323291 -0.889908 3.735959e-01 \n42 OCCUPATION ? -0.431388 0.127740 -3.377076 7.431725e-04 ***\n43 OCCUPATION Armed-F -0.475247 1.171912 -0.405531 6.851201e-01 \n44 OCCUPATION Cleric. 0.061619 0.096504 0.638513 5.231951e-01 \n45 OCCUPATION Crafts -0.043042 0.096574 -0.445692 6.558559e-01 \n46 OCCUPATION Exec. 0.303436 0.095954 3.162299 1.582974e-03 **\n47 OCCUPATION Farming 0.217302 0.162729 1.335361 1.818730e-01 \n48 OCCUPATION Handler -0.326269 0.149467 -2.182882 2.913193e-02 *\n49 OCCUPATION House-s -1.228783 0.330235 -3.720932 2.026102e-04 ***\n50 OCCUPATION Machine -0.056461 0.118308 -0.477237 6.332329e-01 \n51 OCCUPATION Other -0.257109 0.101257 -2.539175 1.116810e-02 *\n52 OCCUPATION Prof. -0.139071 0.105009 -1.324372 1.854937e-01 \n53 OCCUPATION Protec. 0.028677 0.180879 0.158544 8.740406e-01 \n54 OCCUPATION TechSup -0.337437 0.132700 -2.542856 1.105141e-02 *\n55 OCCUPATION Transp. 0.296709 0.136495 2.173768 2.981078e-02 *\n56 Y_BOX_GAMES None -0.463980 0.083037 -5.587607 2.536575e-08 ***\n\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 '\n\nFit Details: \n\n name value\n0 ADJUSTED_R_SQUARE 0.577186\n1 AIC 1134.321375\n2 COEFF_VAR 30.427312\n3 CORRECTED_TOTAL_DF 2708.000000\n4 CORRECTED_TOT_SS 9534.730897\n5 DEPENDENT_MEAN 4.009967\n6 ERROR_DF 2652.000000\n7 ERROR_MEAN_SQUARE 1.488705\n8 ERROR_SUM_SQUARES 3948.046492\n9 F_VALUE 67.012740\n10 GMSEP 1.520714\n11 HOCKING_SP 0.000562\n12 J_P 1.520029\n13 MODEL_CONVERGED 1.000000\n14 MODEL_DF 56.000000\n15 MODEL_F_P_VALUE 0.000000\n16 MODEL_MEAN_SQUARE 99.762222\n17 MODEL_SUM_SQUARES 5586.684405\n18 NUM_PARAMS 57.000000\n19 NUM_ROWS 2709.000000\n20 RANK_DEFICIENCY 0.000000\n21 ROOT_MEAN_SQ 1.220125\n22 R_SQ 0.585930\n23 SBIC 1470.868461\n24 VALID_COVARIANCE_MATRIX 1.000000\n\nRank: \n\n57\n\nDeviance: \n\n3948.046492\n\nAIC: \n\n1134\n\nNull Deviance: \n\n9534.730897\n\nDF Residual: \n\n2652.0\n\nDF Null: \n\n2708.0\n\nConverged: \n\nTrue\n\n\n","type":"TEXT"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Display model fit details","message":["%python","","z.show(glm_mod.fit_details.round(6))"],"enabled":true,"result":{"startTime":1704380968416,"interpreter":"python.low","endTime":1704380968541,"results":[{"message":"name\tvalue\nADJUSTED_R_SQUARE\t0.577186\nAIC\t1134.321375\nCOEFF_VAR\t30.427312\nCORRECTED_TOTAL_DF\t2708.0\nCORRECTED_TOT_SS\t9534.730897\nDEPENDENT_MEAN\t4.009967\nERROR_DF\t2652.0\nERROR_MEAN_SQUARE\t1.488705\nERROR_SUM_SQUARES\t3948.046492\nF_VALUE\t67.01274\nGMSEP\t1.520714\nHOCKING_SP\t0.000562\nJ_P\t1.520029\nMODEL_CONVERGED\t1.0\nMODEL_DF\t56.0\nMODEL_F_P_VALUE\t0.0\nMODEL_MEAN_SQUARE\t99.762222\nMODEL_SUM_SQUARES\t5586.684405\nNUM_PARAMS\t57.0\nNUM_ROWS\t2709.0\nRANK_DEFICIENCY\t0.0\nROOT_MEAN_SQ\t1.220125\nR_SQ\t0.58593\nSBIC\t1470.868461\nVALID_COVARIANCE_MATRIX\t1.0\n","type":"TABLE"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Display the coefficients","message":["%python","","z.show(glm_mod.coef.head(10).round(3))"],"enabled":true,"result":{"startTime":1704380968658,"interpreter":"python.low","endTime":1704380968782,"results":[{"message":"attribute name\tattribute value\tcoefficient\tstd error\tt value\tp value\n(Intercept)\tNone\t153.287\t5.432\t28.217\t0.0\nAFFINITY_CARD\tNone\t0.341\t0.069\t4.972\t0.0\nCUST_CREDIT_LIMIT\tNone\t0.0\t0.0\t2.032\t0.042\nCUST_YEAR_OF_BIRTH\tNone\t-0.076\t0.003\t-27.355\t0.0\nY_BOX_GAMES\tNone\t-0.464\t0.083\t-5.588\t0.0\nCUST_GENDER\tF\t-0.114\t0.072\t-1.592\t0.112\nCUST_INCOME_LEVEL\tA: Below 30,000\t0.639\t0.198\t3.228\t0.001\nCUST_INCOME_LEVEL\tB: 30,000 - 49,999\t0.144\t0.131\t1.093\t0.274\nCUST_INCOME_LEVEL\tC: 50,000 - 69,999\t0.217\t0.143\t1.525\t0.127\nCUST_INCOME_LEVEL\tD: 70,000 - 89,999\t-0.011\t0.154\t-0.073\t0.942\n","type":"TABLE"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Make predictions using test data and display results","message":["%python","","RES_DF = glm_mod.predict(TEST_X, supplemental_cols = TEST)","","z.show(RES_DF[['PREDICTION', 'YRS_RESIDENCE'] + RES_DF.columns].head(10).round(2))"],"enabled":true,"result":{"startTime":1704380968897,"interpreter":"python.low","endTime":1704380969196,"results":[{"message":"PREDICTION\tYRS_RESIDENCE\tCUST_ID\tCUST_GENDER\tCUST_MARITAL_STATUS\tCUST_YEAR_OF_BIRTH\tCUST_INCOME_LEVEL\tCUST_CREDIT_LIMIT\tEDUCATION\tAFFINITY_CARD\tHOUSEHOLD_SIZE\tOCCUPATION\tYRS_RESIDENCE\tY_BOX_GAMES\tPREDICTION\n4.57\t2\t100134\tF\tDivorc.\t1965\tL: 300,000 and above\t9000\tAssoc-A\t0\t2\tCleric.\t2\t0\t4.57\n1.84\t2\t101232\tM\tNeverM\t1979\tJ: 190,000 - 249,999\t9000\t< Bach.\t0\t1\tOther\t2\t1\t1.84\n5.31\t5\t103791\tM\tDivorc.\t1952\tB: 30,000 - 49,999\t3000\tHS-grad\t0\t2\tProf.\t5\t0\t5.31\n2.23\t2\t102308\tM\tNeverM\t1980\tJ: 190,000 - 249,999\t11000\t< Bach.\t0\t2\tProf.\t2\t1\t2.23\n4.71\t4\t100558\tM\tMarried\t1964\tJ: 190,000 - 249,999\t11000\tAssoc-V\t1\t3\tMachine\t4\t0\t4.71\n3.1\t4\t103401\tM\tDivorc.\t1975\tI: 170,000 - 189,999\t10000\tHS-grad\t0\t2\tCrafts\t4\t1\t3.1\n6.83\t0\t102740\tF\tDivorc.\t1929\tK: 250,000 - 299,999\t15000\t10th\t0\t2\tOther\t0\t0\t6.83\n3.75\t3\t103829\tM\tDivorc.\t1973\tE: 90,000 - 109,999\t7000\tHS-grad\t0\t9+\tMachine\t3\t0\t3.75\n3.39\t3\t104077\tM\tNeverM\t1975\tL: 300,000 and above\t9000\tProfsc\t0\t2\tProf.\t3\t1\t3.39\n5.56\t5\t101798\tM\tMarried\t1960\tG: 130,000 - 149,999\t15000\tHS-grad\t1\t3\tExec.\t5\t0\t5.56\n","type":"TABLE"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Model quality statistics on the TEST data","message":["%python","","import numpy as np","","# The coefficient of determination R?2 is part of the \"score\" method for OML models","CoefficientOfDeterminationR2 = glm_mod.score(TEST_X, TEST_Y)"," ","# Compute other statistics by hand - no need to pull data locally to Python","# RES_DF[['PREDICTION', 'YRS_RESIDENCE']","MeanAbsoluteError = (abs(RES_DF['YRS_RESIDENCE'] - RES_DF['PREDICTION']).sum())/RES_DF['YRS_RESIDENCE'].count()","MeanSquaredError = ((RES_DF['YRS_RESIDENCE'] - RES_DF['PREDICTION'])**2).sum()/RES_DF['YRS_RESIDENCE'].count()","RootMeanSquaredError = np.sqrt(MeanSquaredError)","","print('R^2: ', CoefficientOfDeterminationR2.round(4))","print('MAE: ', MeanAbsoluteError.round(4))","print('MSE: ', MeanSquaredError.round(4))","print('RMSE: ', RootMeanSquaredError.round(4))"],"enabled":true,"result":{"startTime":1704380969311,"interpreter":"python.low","endTime":1704380969671,"results":[{"message":"R^2: 0.5569\nMAE: 0.8651\nMSE: 1.6673\nRMSE: 1.2912\n","type":"TEXT"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":"[{\"table\":{\"version\":1},\"scatterplot\":{\"groupByColumns\":[],\"showSeriesX\":[\"YRS_RESIDENCE\"],\"label\":[],\"extraDescription\":[],\"showLabel\":\"no\",\"showDescription\":\"no\",\"lastColumns\":[\"PREDICTION\",\"YRS_RESIDENCE\"],\"version\":1}}]","hideInIFrame":false,"selectedVisualization":"scatterplot","title":"Plot the predicted and actual years of residence ","message":["%python","","z.show(RES_DF[['PREDICTION', 'YRS_RESIDENCE']].head(100))"],"enabled":true,"result":{"startTime":1704380969798,"interpreter":"python.low","endTime":1704380969983,"results":[{"message":"PREDICTION\tYRS_RESIDENCE\n4.57191443901974\t2.0\n1.8358546077171392\t2.0\n5.312181183257389\t5.0\n2.2323988564511543\t2.0\n4.707440334622631\t4.0\n3.0985366454325205\t4.0\n6.827973805136314\t0.0\n3.7514697241400805\t3.0\n3.389984135786385\t3.0\n5.558543569472224\t5.0\n4.931785043571118\t5.0\n3.821346504334991\t1.0\n4.671141595251779\t4.0\n4.637060248997707\t5.0\n3.928007733127923\t4.0\n4.685315294174711\t5.0\n4.272350467119804\t5.0\n3.856041579641236\t3.0\n4.763705072196844\t5.0\n4.393157118465297\t4.0\n2.576899442953469\t4.0\n4.201836156665195\t3.0\n1.2322666658095578\t1.0\n4.304581897574273\t3.0\n3.8575624269465103\t3.0\n5.460466693888613\t5.0\n4.615890688182985\t5.0\n1.752029200165881\t2.0\n5.523615432036287\t5.0\n3.1286523072376253\t3.0\n4.017237980750553\t5.0\n3.649534665295635\t2.0\n4.209938366084536\t6.0\n4.183053991337589\t4.0\n4.720103459218842\t3.0\n1.7869371148460438\t2.0\n5.428604534507215\t6.0\n3.373842654253\t5.0\n4.0534245034282\t5.0\n5.179803954965174\t10.0\n1.3760803288660053\t1.0\n4.965887241808666\t7.0\n7.015572559176409\t7.0\n2.763471212249958\t2.0\n5.727870296291183\t7.0\n2.913572577855662\t4.0\n4.787647909451225\t5.0\n3.269514294134353\t3.0\n5.251748264297419\t7.0\n4.337527610211263\t4.0\n4.403361748758342\t5.0\n5.530763155593056\t4.0\n2.8202897431844773\t3.0\n2.743044928352869\t3.0\n4.351015663708916\t4.0\n3.0686750672400356\t3.0\n2.3807790690896296\t2.0\n7.156367191586237\t6.0\n4.440458514903465\t9.0\n1.6489176427546925\t1.0\n6.250639807930804\t6.0\n4.367295798221904\t4.0\n5.42467435536096\t6.0\n5.9403541537286\t6.0\n2.6442704547250946\t2.0\n3.2130959589247268\t2.0\n4.3974729014767195\t4.0\n5.736500253879448\t6.0\n1.4447064621960093\t0.0\n3.922643064613891\t3.0\n4.183053991337589\t4.0\n4.865308075564007\t7.0\n4.286962609718261\t4.0\n4.993338829966633\t6.0\n3.9027181635342836\t3.0\n3.3670569631617067\t4.0\n6.08959229229739\t6.0\n2.45238843601096\t1.0\n3.787275137527118\t3.0\n2.538570849790202\t2.0\n2.5111421778298344\t3.0\n3.732700729526654\t4.0\n3.3893333377073587\t4.0\n2.5793226011902117\t3.0\n5.761236072739153\t5.0\n1.0438523152603947\t2.0\n1.8154284561596836\t0.0\n1.9777145389965205\t2.0\n4.073812374671\t3.0\n3.6146461321885575\t3.0\n1.6360232192990298\t2.0\n5.457614570091144\t5.0\n3.9034462503274163\t5.0\n2.5393831245365903\t3.0\n4.727773483841452\t4.0\n5.607211323465545\t6.0\n5.95095004671569\t4.0\n1.4972476421551408\t1.0\n5.958070954521609\t5.0\n1.6347720515702422\t3.0\n","type":"TABLE"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Using matplotlib, plot the predicted and actual years of residence","message":["%python","","import numpy as np","import matplotlib.pyplot as plt","plt.style.use('seaborn')","plt.figure(figsize=[9,7])","plt.margins(0.05)","","x = RES_DF[['PREDICTION']].pull()","y = RES_DF[['YRS_RESIDENCE']].pull()","n = len(x)","","plt.plot(x, y, '.', c='black', alpha=0.8, ms=7)","","plt.plot( [np.min(y),np.max(y)],[np.min(y),np.max(y)],"," c='red', alpha=0.8, ms=3,"," label='Perfect prediction reference line')","","plt.legend(frameon=True, facecolor='lightgray') ","plt.xlabel('PREDICTION')","plt.ylabel('YRS_RESIDENCE')","plt.title('The prediction vs. actual value for years of residence',"," fontsize=16)","plt.grid(True)","plt.show()"],"enabled":true,"result":{"startTime":1704380970110,"interpreter":"python.low","endTime":1704380970415,"results":[{"message":"<div style='width:auto;height:auto'><img src= style='width=auto;height:auto'><div>\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Using matplotlib, plot the residuals","message":["%python","","import numpy as np","import matplotlib.pyplot as plt","plt.style.use('seaborn')","plt.figure(figsize=[9,7])","plt.margins(0.05)","","x = np.matrix(RES_DF[['PREDICTION']].pull())","y = np.matrix(RES_DF[['YRS_RESIDENCE']].pull())","plt.plot(x, y-x, '.', c='black', alpha=0.8, ms=7)","","plt.plot( [np.min(x),np.max(x)],[0,0],"," c='red', alpha=0.8, ms=3,"," label='Perfect prediction (Zero residuals)')","","plt.legend(frameon=True, facecolor='lightgray',loc='upper left') ","plt.xlabel('PREDICTION')","plt.ylabel('RESIDUAL')","plt.title('Prediction of Years of Residence vs. Residuals', fontsize=16)","plt.grid(True)","plt.show()"],"enabled":true,"result":{"startTime":1704380970533,"interpreter":"python.low","endTime":1704380970749,"results":[{"message":"<div style='width:auto;height:auto'><img src= style='width=auto;height:auto'><div>\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":6,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Display prediction details in python","message":["%python","","","RES_DF = glm_mod.predict(TEST, supplemental_cols = TEST[['CUST_ID']], proba = True, topN_attrs = True)","","z.show(RES_DF.head())"],"enabled":true,"result":{"startTime":1704380970872,"interpreter":"python.low","endTime":1704380979602,"results":[{"message":"CUST_ID\tPREDICTION\tNAME_1\tVALUE_1\tWEIGHT_1\tNAME_2\tVALUE_2\tWEIGHT_2\tNAME_3\tVALUE_3\tWEIGHT_3\tNAME_4\tVALUE_4\tWEIGHT_4\tNAME_5\tVALUE_5\tWEIGHT_5\n100100\t4.472758165280582\tCUST_YEAR_OF_BIRTH\t1959\t.201\tEDUCATION\tMasters\t.155\tAFFINITY_CARD\t1\t.12\tY_BOX_GAMES\t0\t.067\tCUST_CREDIT_LIMIT\t10000\t.015\n100200\t1.5283330601173908\tCUST_CREDIT_LIMIT\t9000\t.006\tCUST_INCOME_LEVEL\tL: 300,000 and above\t-.001\tCUST_MARITAL_STATUS\tNeverM\t-.005\tAFFINITY_CARD\t0\t-.03\tEDUCATION\t< Bach.\t-.075\n100300\t5.060057240486081\tCUST_YEAR_OF_BIRTH\t1961\t.25\tAFFINITY_CARD\t1\t.231\tCUST_INCOME_LEVEL\tG: 130,000 - 149,999\t.147\tY_BOX_GAMES\t0\t.129\tEDUCATION\tBach.\t.089\n100400\t6.096717138233105\tCUST_YEAR_OF_BIRTH\t1944\t.503\tOCCUPATION\tTransp.\t.095\tCUST_MARITAL_STATUS\tDivorc.\t.081\tCUST_INCOME_LEVEL\tC: 50,000 - 69,999\t.07\tY_BOX_GAMES\t0\t.046\n100900\t4.83448125559028\tCUST_YEAR_OF_BIRTH\t1959\t.376\tOCCUPATION\tExec.\t.265\tY_BOX_GAMES\t0\t.126\tCUST_INCOME_LEVEL\tF: 110,000 - 129,999\t-.072\tAFFINITY_CARD\t0\t-.072\n","type":"TABLE"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Materialize GLM_TEST_DATA for use in query below","message":["%python","","try:"," oml.drop(table = 'GLM_TEST_DATA')","except:"," pass","_ = TEST.materialize(table = 'GLM_TEST_DATA')"],"enabled":true,"result":{"startTime":1704380979723,"interpreter":"python.low","endTime":1704380980079,"results":[],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Display predictions with explanatory prediction details in SQL","message":["%sql","","SELECT CUST_ID,"," round(PREDICTION_YRS_RES,3) PRED_YRS_RES,"," round(PRED_LOWER_LIMIT,1) LOWER_BOUND,"," round(PRED_UPPER_LIMIT,1) UPPER_BOUND,"," RTRIM(TRIM(SUBSTR(OUTPRED.\"Attribute1\",17,100)),'rank=\"1\"/>') FIRST_ATTRIBUTE,"," RTRIM(TRIM(SUBSTR(OUTPRED.\"Attribute2\",17,100)),'rank=\"2\"/>') SECOND_ATTRIBUTE,"," RTRIM(TRIM(SUBSTR(OUTPRED.\"Attribute3\",17,100)),'rank=\"3\"/>') THIRD_ATTRIBUTE","FROM (SELECT CUST_ID,"," PREDICTION(GLM_REGRESSION_MODEL USING *) PREDICTION_YRS_RES,"," PREDICTION_BOUNDS(GLM_REGRESSION_MODEL USING *).LOWER PRED_LOWER_LIMIT,"," PREDICTION_BOUNDS(GLM_REGRESSION_MODEL USING *).UPPER PRED_UPPER_LIMIT,"," PREDICTION_DETAILS(GLM_REGRESSION_MODEL USING *) PD"," FROM GLM_TEST_DATA"," WHERE CUST_ID < 100015"," ORDER BY CUST_ID) OUT,"," XMLTABLE('/Details'"," PASSING OUT.PD"," COLUMNS "," \"Attribute1\" XMLType PATH 'Attribute[1]',"," \"Attribute2\" XMLType PATH 'Attribute[2]',"," \"Attribute3\" XMLType PATH 'Attribute[3]') OUTPRED"],"enabled":true,"result":{"startTime":1704380980198,"interpreter":"sql.low","endTime":1704380980459,"results":[{"message":"CUST_ID\tPRED_YRS_RES\tLOWER_BOUND\tUPPER_BOUND\tFIRST_ATTRIBUTE\tSECOND_ATTRIBUTE\tTHIRD_ATTRIBUTE\n100002\t4.186\t3.9\t4.4\t\"CUST_YEAR_OF_BIRTH\" actualValue=\"1962\" weight=\".219\" \t\"Y_BOX_GAMES\" actualValue=\"0\" weight=\".155\" \t\"EDUCATION\" actualValue=\"Bach.\" weight=\".107\" \n100003\t3.619\t3.4\t3.9\t\"Y_BOX_GAMES\" actualValue=\"0\" weight=\".157\" \t\"CUST_INCOME_LEVEL\" actualValue=\"K: 250,000 - 299,999\" weight=\".013\" \t\"CUST_MARITAL_STATUS\" actualValue=\"NeverM\" weight=\"-.015\" \n100005\t5.419\t5.1\t5.8\t\"CUST_YEAR_OF_BIRTH\" actualValue=\"1957\" weight=\".392\" \t\"AFFINITY_CARD\" actualValue=\"1\" weight=\".174\" \t\"EDUCATION\" actualValue=\"Assoc-A\" weight=\".143\" \n100009\t3.499\t3.2\t3.8\t\"AFFINITY_CARD\" actualValue=\"1\" weight=\".146\" \t\"CUST_INCOME_LEVEL\" actualValue=\"G: 130,000 - 149,999\" weight=\".093\" \t\"EDUCATION\" actualValue=\"Bach.\" weight=\".056\" \n100010\t3.006\t2.8\t3.2\t\"CUST_CREDIT_LIMIT\" actualValue=\"9000\" weight=\".013\" \t\"CUST_INCOME_LEVEL\" actualValue=\"L: 300,000 and above\" weight=\"-.003\" \t\"OCCUPATION\" actualValue=\"Crafts\" weight=\"-.034\" \n100012\t5.414\t4.9\t5.9\t\"EDUCATION\" actualValue=\"PhD\" weight=\".395\" \t\"CUST_INCOME_LEVEL\" actualValue=\"H: 150,000 - 169,999\" weight=\".146\" \t\"AFFINITY_CARD\" actualValue=\"1\" weight=\".131\" \n100014\t5.247\t5\t5.5\t\"CUST_YEAR_OF_BIRTH\" actualValue=\"1954\" weight=\".449\" \t\"CUST_MARITAL_STATUS\" actualValue=\"Divorc.\" weight=\".141\" \t\"Y_BOX_GAMES\" actualValue=\"0\" weight=\".08\" \n","type":"TABLE"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","# End of Script"],"enabled":true,"result":{"startTime":1704380980584,"interpreter":"md.low","endTime":1704380980681,"results":[{"message":"<h1 id=\"end-of-script\">End of Script<\/h1>\n","type":"HTML"}],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md"],"enabled":true,"result":{"startTime":1704380980822,"interpreter":"md.low","endTime":1704380980918,"results":[],"taskStatus":"SUCCESS","forms":"[]","status":"SUCCESS"},"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"}],"version":"6","snapshot":false,"tags":null}]